No Hyperparam Tuning¶

In [ ]:
import numpy as np
from utilities.dataset_jlb import CityDataset

cities = ["Aachen", 'London', 'CapeTown', 'Hamburg', 'Johannesburg', 'London', 'Montreal', 'Paris', 'Seoul', 'Singapore', 'Sydney']
In [ ]:
# from data_acquisition import DataHandler
# from utilities.utils import setup_logger
# logger = setup_logger(level="ERROR")
# dh = DataHandler(logger, "data")

# BerlinTest = dh.get_building_mask("BerlinTest", all_touched=True)
In [ ]:
# check if the mask is correct
import rasterio
from utilities.plot_utils import (plot_band_with_mask, 
                                  histogram_scaler_bands, 
                                  describe_tif, 
                                  plot_bands)
b = {
    "B04": 0,
    "B03": 1,
    "B02": 2,
    "B08": 3,
    "B12": 4,
    "B11": 5,
}
with rasterio.open("/home/jlb/Projects/architecture-of-ml-systems/data/BerlinTest/openEO.tif") as src:
    describe_tif(src)
    data = src.read([1,2,3,4,5,6])
with rasterio.open("/home/jlb/Projects/architecture-of-ml-systems/data/BerlinTest/building_mask_dense.tif") as src:
    describe_tif(src)
    labels = src.read(1)

data = histogram_scaler_bands(data, 1.0, 99.0)

plot_bands(data, bands=[b["B04"], b["B03"], b["B02"]], title="BerlinTest")
plot_band_with_mask(data[b["B04"]], labels, title="BerlinTest")
Profile:
	 {'driver': 'GTiff', 'dtype': 'int16', 'nodata': -32768.0, 'width': 1427, 'height': 1361, 'count': 6, 'crs': CRS.from_epsg(32633), 'transform': Affine(10.0, 0.0, 384100.0,
       0.0, -10.0, 5826300.0), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'deflate', 'interleave': 'band'}
SHAPE:		 (1361, 1427)
dtype		 int16
max		 10608
min		 98
mean		 772.9615595523923
std		 477.7241897203279
sum		 1501204974
Profile:
	 {'driver': 'GTiff', 'dtype': 'int16', 'nodata': -32768.0, 'width': 1427, 'height': 1361, 'count': 1, 'crs': CRS.from_epsg(32633), 'transform': Affine(10.0, 0.0, 384100.0,
       0.0, -10.0, 5826300.0), 'blockysize': 2, 'tiled': False, 'interleave': 'band'}
SHAPE:		 (1361, 1427)
dtype		 int16
max		 1
min		 0
mean		 0.40554036331956334
std		 0.4909963106156703
sum		 787619
p_down, p_up [200. 296. 232. 288. 219. 313.] [2545. 2351. 2146. 4188. 2732. 3103.]
No description has been provided for this image
No description has been provided for this image
In [ ]:
dataset = CityDataset("/home/jlb/Projects/architecture-of-ml-systems/data",
                      patch_size=16,
                      data_name="openEO.tif",
                      labels_name="building_mask_dense.tif",
                      image_bands=[1,2,3,4,5,6],
                      min_labels=0.1,
                      cities=cities,
                      train=True,)

dataset_test = CityDataset("/home/jlb/Projects/architecture-of-ml-systems/data",
                            data_name="openEO.tif",
                            labels_name="building_mask_dense.tif",
                            image_bands=[1,2,3,4,5,6],
                            cities=["BerlinTest"],
                            train=False)
                           
Loading data from cities:
['Singapore', 'Johannesburg', 'London', 'Montreal', 'Seoul', 'Aachen', 'CapeTown', 'Hamburg', 'Paris', 'Sydney']
Loading Images:   0%|          | 0/10 [00:00<?, ?it/s]
Loading Labels:   0%|          | 0/10 [00:00<?, ?it/s]
Creating Patches from Images: 0it [00:00, ?it/s]
Loading data from cities:
['BerlinTest']
Loading Images:   0%|          | 0/1 [00:00<?, ?it/s]
Loading Labels:   0%|          | 0/1 [00:00<?, ?it/s]
In [ ]:
# it is possible to update the patch size without reloading the dataset
dataset.update_patch_size(32) # 32x32 patches
Creating Patches from Images: 0it [00:00, ?it/s]
In [ ]:
# torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
# from torch.utils.tensorboard import SummaryWriter
import lightning as L
from typing import Any
from torch.utils.data import DataLoader, Dataset
from lightning import seed_everything



# batch size 8 with 16x16 patches GPU memory: 36%, 117 it/s
# batch size 16 with 16x16 patches GPU memory: 37%, 117 it/s
# batch size 32 with 16x16 patches GPU memory: 40%, 90 it/s
batch_size = 32

train_dataset, val_dataset = dataset.train_val_split(val_size=0.1, 
                                                     n_groups=100, 
                                                     random_state=42, 
                                                     show_summary=True)
train_dl = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=20)
val_dl = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=20)


print(f"Len total dataset: {len(dataset)}")
print(f"len train dataset: {len(train_dataset)}")
print(f"len val dataset: {len(val_dataset)}")

sample_train = next(iter(train_dl))
sample_val = next(iter(val_dl))
print("Training Sample (data, labels)",sample_train["data"].shape, sample_train["labels"].shape)
print("Validation Sample (data, labels)", sample_val["data"].shape, sample_val["labels"].shape)
print("Len train", len(train_dl))
print("Len val", len(val_dl))
Train:
Number of samples: 43038
Shape of Train data (data, label) (6, 32, 32) (32, 32)
Val:
Number of samples: 4783
Shape of Val data (data, label) (6, 32, 32) (32, 32)
**************************************************
Mean percentage of 1 labels in train: 0.3563743586699545
Mean percentage of 1 labels in val: 0.3564508251881664
Mean percentage of 1 labels in all data: 0.35638200676088955
**************************************************
Std of percentage 1 labels in train: 0.18495708352186507
Std of percentage 1 labels in val: 0.18479195943286666
Std of percentage 1 labels in all data: 0.18494057606377182
**************************************************
Min percentage of 1 labels in train: 0.1005859375
Min percentage of 1 labels in val: 0.1005859375
Min percentage of 1 labels in all data: 0.1005859375
**************************************************
Max percentage of 1 labels in train: 0.9814453125
Max percentage of 1 labels in val: 0.98828125
Max percentage of 1 labels in all data: 0.98828125
**************************************************
Len total dataset: 47821
len train dataset: 43038
len val dataset: 4783
Training Sample (data, labels) torch.Size([32, 6, 32, 32]) torch.Size([32, 32, 32])
Validation Sample (data, labels) torch.Size([32, 6, 32, 32]) torch.Size([32, 32, 32])
Len train 1345
Len val 150
In [ ]:
test_dl = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=20)
sample_test = next(iter(test_dl))
print(sample_test["data"].shape, sample_test["labels"].shape)
torch.Size([1, 6, 1361, 1427]) torch.Size([1, 1361, 1427])

Load Model¶

In [ ]:
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks.model_checkpoint import ModelCheckpoint
from utilities.Lightning_utils import LitModule
from utilities.Lightning_utils import ConvNetSimple

# model
convmodel = LitModule(ConvNetSimple(channels=6))

# trainer
def get_trainer(directory):
    trainer = L.Trainer(
        default_root_dir=f"models/exp01/{directory}",
        callbacks=[
            EarlyStopping(
                monitor="val_loss",
                mode="min",
                patience=2,
            ),
            ModelCheckpoint(
                monitor="val_loss",
                mode="min",
                save_top_k=2,
                dirpath=f"models/exp01/{directory}",
                filename="best_model"
            )
        ],
        # val_check_interval=1,
        fast_dev_run=False,
        num_sanity_val_steps=2,
        max_epochs=100,
        log_every_n_steps=20,
    )
    return trainer

Tune LR¶

In [ ]:
from lightning.pytorch.tuner.tuning import Tuner
In [ ]:
trainer = get_trainer("ConvNetSimple")

seed_everything(49)
tuner = Tuner(trainer=trainer)
torch.set_float32_matmul_precision('high') # for tensor cores

tuner.lr_find(convmodel, train_dl, val_dl, min_lr=1e-6, max_lr=0.01, num_training=5000)

# good lr: 0.001174897554939528
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 49
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Finding best initial lr:   0%|          | 0/5000 [00:00<?, ?it/s]
`Trainer.fit` stopped: `max_steps=5000` reached.
Learning rate set to 0.0016143585568264868
Restoring states from the checkpoint path at models/exp01/ConvNetSimple/.lr_find_a760014e-991e-4f02-b8d7-9a47d7432101.ckpt
Restored all states from the checkpoint at models/exp01/ConvNetSimple/.lr_find_a760014e-991e-4f02-b8d7-9a47d7432101.ckpt
Out[ ]:
<lightning.pytorch.tuner.lr_finder._LRFinder at 0x7fecbf4f0d90>

Train Model¶

In [ ]:
# # training
trainer.fit(convmodel, 
    train_dataloaders=train_dl,
    val_dataloaders=val_dl   
)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  | Name  | Type          | Params | Mode 
------------------------------------------------
0 | model | ConvNetSimple | 94.2 K | train
1 | loss  | BCELoss       | 0      | train
------------------------------------------------
94.2 K    Trainable params
0         Non-trainable params
94.2 K    Total params
0.377     Total estimated model params size (MB)
Sanity Checking: |          | 0/? [00:00<?, ?it/s]
Training: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
In [ ]:
best_model_conv = LitModule.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
trainer.test(model=best_model_conv, dataloaders=test_dl)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Testing: |          | 0/? [00:00<?, ?it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_loss_epoch        0.38404497504234314
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Out[ ]:
[{'test_loss_epoch': 0.38404497504234314}]
In [ ]:
# plot the output of the test
import matplotlib.pyplot as plt

sample = next(iter(test_dl))
print(f"Sample shape: {sample['data'].shape}")
# convmodel.eval()
# output = convmodel(sample["data"])
# print(output.shape)

prediction = trainer.predict(model=best_model_conv, dataloaders=test_dl)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Sample shape: torch.Size([1, 6, 1361, 1427])
Predicting: |          | 0/? [00:00<?, ?it/s]
In [ ]:
print(prediction[0].shape)
output = prediction[0].detach().numpy()
output = output.squeeze()
print(output.shape)
# output
torch.Size([1, 1, 1361, 1427])
(1361, 1427)
In [ ]:
from utilities.plot_utils import (
    plot_prediction_with_thresholds,
    plot_random_patch,
    plot_output
)

plot_output(output)
plot_random_patch(output, patch_len=6)
plot_prediction_with_thresholds(output)
No description has been provided for this image
No description has been provided for this image
[[0.01106262 0.01344627 0.01856582 0.02052435 0.04994096 0.0941106 ]
 [0.0394295  0.04609582 0.0433219  0.04410533 0.06069243 0.06888498]
 [0.02360394 0.02088859 0.01290968 0.01555219 0.02547724 0.03814513]
 [0.03192084 0.0317886  0.02858587 0.02829278 0.04094206 0.0914407 ]
 [0.18983711 0.21338607 0.17830016 0.08393327 0.12551358 0.23672046]
 [0.32559654 0.32085553 0.39692518 0.28868127 0.34046698 0.36366984]]
No description has been provided for this image

Model U-Net¶

In [ ]:
# import torchmetrics
# from torchmetrics import Dice
from utilities.unet import UNet

# model
unet = UNet(n_channels=len(dataset.get_image_bands()), n_classes=1, bilinear=True)


# UNet implementation uses the BCEWithLogitsLoss, lr of 1e-5 default
unet_lit = LitModule(unet, learning_rate=1e-4, loss=nn.BCEWithLogitsLoss())

torch.set_float32_matmul_precision('high')
unet_trainer = get_trainer("unet")

seed_everything(49)
# lr finder
tuner = Tuner(unet_trainer)
tuner.lr_find(unet_lit, train_dl, val_dl, min_lr=1e-6, max_lr=0.01, num_training=5000)
# good lr: 0.0031915378551007614

unet_trainer.fit(unet_lit, 
    train_dataloaders=train_dl,
    val_dataloaders=val_dl   
)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 49
Missing logger folder: models/exp01/unet/lightning_logs
/home/jlb/Projects/architecture-of-ml-systems/.venv/lib/python3.10/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:652: Checkpoint directory /home/jlb/Projects/architecture-of-ml-systems/models/exp01/unet exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Finding best initial lr:   0%|          | 0/5000 [00:00<?, ?it/s]
`Trainer.fit` stopped: `max_steps=5000` reached.
Learning rate set to 0.0008534930716135486
Restoring states from the checkpoint path at models/exp01/unet/.lr_find_59fc8274-d55b-45bb-b76a-705e2fcc05f6.ckpt
Restored all states from the checkpoint at models/exp01/unet/.lr_find_59fc8274-d55b-45bb-b76a-705e2fcc05f6.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type              | Params | Mode 
----------------------------------------------------
0 | model | UNet              | 17.3 M | train
1 | loss  | BCEWithLogitsLoss | 0      | train
----------------------------------------------------
17.3 M    Trainable params
0         Non-trainable params
17.3 M    Total params
69.059    Total estimated model params size (MB)
Sanity Checking: |          | 0/? [00:00<?, ?it/s]
Training: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
Validation: |          | 0/? [00:00<?, ?it/s]
In [ ]:
best_model = LitModule.load_from_checkpoint(unet_trainer.checkpoint_callback.best_model_path)
unet_trainer.test(model=best_model, dataloaders=test_dl)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Testing: |          | 0/? [00:00<?, ?it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_loss_epoch        0.35365039110183716
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Out[ ]:
[{'test_loss_epoch': 0.35365039110183716}]
In [ ]:
# unet_trainer.test(model=unet_lit, dataloaders=test_dl)
In [ ]:
prediction = unet_trainer.predict(model=best_model, dataloaders=test_dl)
# prediction = unet_trainer.predict(model=unet_lit, dataloaders=test_dl)
print(prediction[0].shape)
# prediction = F.interpolate(prediction[0], (output.shape[2], output.shape[3]), mode="bilinear")


output = torch.sigmoid(prediction[0]).detach().numpy()
# output = prediction[0].detach().numpy()
print(output.shape)
output = output.squeeze()
print(output.shape)
# output
plot_output(output)
plot_random_patch(output, patch_len=6)
plot_prediction_with_thresholds(output)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Predicting: |          | 0/? [00:00<?, ?it/s]
torch.Size([1, 1, 1361, 1427])
(1, 1, 1361, 1427)
(1361, 1427)
No description has been provided for this image
No description has been provided for this image
[[0.03297826 0.03040841 0.03067048 0.0410015  0.0494289  0.04906508]
 [0.05410413 0.04744549 0.06491446 0.08485103 0.100301   0.08138527]
 [0.03333778 0.03750996 0.04904346 0.05081847 0.0359347  0.02989559]
 [0.06588066 0.07073054 0.07238087 0.06794988 0.05650675 0.07369687]
 [0.22064649 0.17509465 0.1439425  0.13441636 0.13590924 0.13892187]
 [0.2591825  0.19466874 0.16221802 0.13725671 0.11017903 0.12332146]]
No description has been provided for this image